package org.biogroovy.io.eutils

import groovy.util.logging.Slf4j
import groovy.util.slurpersupport.GPathResult
import groovy.util.slurpersupport.NodeChild

import org.biogroovy.conf.BioGroovyConfig
import org.biogroovy.eutils.EUtilsURLFactory
import org.biogroovy.io.AbsXmlSlurper
import org.biogroovy.io.IFetcher;
import org.biogroovy.io.NodeType
import org.biogroovy.models.Article
import org.biogroovy.models.Gene
import org.biogroovy.models.GeneOntology
import org.biogroovy.models.GeneOntologyEvidence
import org.biogroovy.models.GeneOntologyType

/**
 * This class uses the stream-based XmlSlurper and GPath to extract information
 * from an EntrezGene record.
 */
@Slf4j
public class EntrezGeneSlurper extends AbsXmlSlurper<Gene> {
	
	/** The name of the database. */
	static final String DATABASE_NAME = "entrezgene"
//'Entrezgene_gene."Gene-ref"."Gene-ref_syn"."Gene-ref_syn_E"'
    static final Map<String, String> GPATH_MAP = [
            entrezGeneId: '"Entrezgene_track-info"."Gene-track"."Gene-track_geneid"',
            omimId      : '"Entrezgene_gene"."Gene-ref"."Gene-ref_db"."Dbtag".find{it."Dbtag_db" == "MIM"}."Dbtag_tag"."Object-id"."Object-id_id"',
            description : 'Entrezgene_summary',
            name        : 'Entrezgene_gene."Gene-ref"."Gene-ref_desc"',
            symbol      : 'Entrezgene_gene."Gene-ref"."Gene-ref_locus"',
            species     : 'Entrezgene_source.BioSource.BioSource_org."Org-ref"."Org-ref_taxname"',
            synonyms    : 'Entrezgene_gene."Gene-ref"."Gene-ref_syn"."Gene-ref_syn_E"*.text()',
            references  : '"Entrezgene_gene"."Gene-ref"."Gene-ref_db".Dbtag',
            articles    : 'depthFirst().findAll{it."Gene-commentary_type".@value=="generif"}',
            phenotypes  : 'depthFirst().findAll{it."Gene-commentary_type".@value=="phenotype"}',
            goList      : 'depthFirst().findAll{it."Gene-commentary"."Gene-commentary_heading" == "GeneOntology"}'

    ];

    /** This map contains the nodetypes for each of the fields specified in the XPath maps */
    static final Map<String, NodeType> NODE_TYPE_MAP = [
            entrezGeneId: NodeType.INTEGER,
            accession   : NodeType.INTEGER,
            description : NodeType.STRING,
            name        : NodeType.STRING,
            symbol      : NodeType.STRING,
            species     : NodeType.STRING,
            synonyms    : NodeType.LIST,
            phenotypes  : NodeType.LIST,
            omimId      : NodeType.STRING

    ]
	
	String tool = null;
	String email = null;
	
	/**
	 * Constructor.
	 */
	public EntrezGeneSlurper(){
		this.databaseName = DATABASE_NAME;
		ConfigObject conf = BioGroovyConfig.getConfig();
		this.tool = conf.eutils.tool
		this.email = conf.eutils.email
	}



    /**
     * This convenience method populates a Gene object with the data from EntrezGene.
     * @param gene a gene object (must have the entrezGeneId property populated)
     */
    public void read(Gene gene) {
		if (gene.entrezGeneId == null){
			throw new IllegalArgumentException("The entrez gene id was null");
		}
		URL url = getUrl(gene.entrezGeneId, [tool:this.tool, email:this.email]);
        read(url.openStream(), gene);
    }

    /**
     * This method reads a file containing a single gene record.
     * @param file The file to be parsed.
     */
    public Gene read(File file) {
		log.debug("Reading file: ${file.getAbsolutePath()}")
        if (!file.exists()) {
            throw new FileNotFoundException("The file was not found: ${file.getName()}")
        }
        return read(new FileInputStream(file));
    }

    /**
     * This method parses an input stream containing record for a single gene.
     * @param is The input stream
     */
    public Gene read(InputStream is) {
        List<Gene> geneList = readList(is);
        return geneList.get(0);
    }


    @Override
    public void parse(Gene gene, NodeChild node) {
        parseData(node, gene, GPATH_MAP, NODE_TYPE_MAP)
        parseDbReferences(node, gene);
        parseArticles(node, gene);
        parseGo(node, gene)
    }

    /**
     * This method parses a single string value using an xpath expression.
     * @param rootNode The root node of the document.
     * @param gPathString The xpath expression.
     */
    String parseString(def rootNode, String gPathString) {
        return Eval.x(rootNode, "x.$gPathString");
    }


    @Override
    protected void parseDbReferences(NodeChild root, Gene gene) {

        def nodeSet = Eval.x(root, "x.${GPATH_MAP.references}").toList();

        nodeSet.each { it ->
            String id = Eval.x(it, 'x.Dbtag_tag."Object-id"."Object-id_id"');
            id = (id == null || id.equals("")) ? Eval.x(it, 'x.Dbtag_tag."Object-id"."Object-id_str"') : id;
            String db = Eval.x(it, 'x.Dbtag_db');
            gene.references.putAt(db, id);
        }

    }

	/**
	 * Parses the PubMed articles
	 * @param root the root node
	 * @param gene the Gene object.
	 */
    protected void parseArticles(NodeChild root, Gene gene) {
        def nodeSet = Eval.x(root, "x.${GPATH_MAP.articles}");
        nodeSet.each {

            String title = it.'Gene-commentary_text';
			
            if (title != null && title != '') {
				it.'Gene-commentary_refs'.'Pub'.'Pub_pmid'.'PubMedId'.each{ pmid ->
	                Article article = new Article();
	                article.title = title
	                article.pubmedId = pmid;
	                gene.articles.add(article);
				}
            }
        }
    }


	/**
	 * This method parses the GO entries
	 * @param root the root object
	 * @param gene the gene
	 */
    protected void parseGo(NodeChild root, Gene gene) {
        def nodeSet = Eval.x(root, "x.${GPATH_MAP.goList}");
        log.debug("goterms: " + nodeSet.size());
        
		nodeSet.each { NodeChild geneNode ->
            log.debug("geneNode: ${geneNode.name()}");

            GPathResult result = geneNode."Entrezgene_properties"."Gene-commentary"."Gene-commentary_comment"."Gene-commentary"."Gene-commentary_source"."Other-source";
            
            NodeChild node = null;

            if(result.size() == 1){
                node = result;
            }else if (result.size() != 0) {
                node = result.getAt(0)

                log.debug( "node: ${node.name()}")
                
				GeneOntology go = new GeneOntology();

                go.goId = parseInteger(node, "'Other-source_src'.'Dbtag'.'Dbtag_tag'.'Object-id'.'Object-id_id'");
                go.name = parseString(node, "'Other-source_anchor'");
                String tempGoType = parseString(geneNode, "'Gene-commentary'.'Gene-commentary_comment'.'Gene-commentary'.'Gene-commentary_label'");
                
				log.debug("goType: ${tempGoType}");
				
                go.type = GeneOntologyType.identifyType(tempGoType);
                if (go.type == GeneOntologyType.FUNCTION) {
                    gene.goFunctionList.add(go)
                } else if (go.type == GeneOntologyType.PROCESS) {
                    gene.goProcessList.add(go);
                } else if (go.type == GeneOntologyType.COMPONENT) {
                    gene.goComponentList.add(go)
                } else {
					log.error("Unable to identify GO type: '${tempGoType}'")
                }

                String tempEv = parseString(node, '**.Other-source_post-text')
                String[] tempEvArray = tempEv.split(":");

                go.evidence = GeneOntologyEvidence.valueOf(tempEvArray[1].trim());

                def arts = parseList(geneNode, "**.PubMedId");
                arts.each { org.w3c.dom.Node art ->
                    go.pmidList.add(art.getTextContent());
                }
				
            }
        }

    }

    /**
     * This method reads a list of genes from the XML document.
     * @param inputStream The stream of XML.
     * @return a list of Gene objects
     * @throws IOException if there is a problem reading the XML.
     */
    public List<Gene> readList(InputStream inputStream) throws IOException {
        List<Gene> geneList = new ArrayList<>();

        XmlSlurper slurper = new XmlSlurper(false, false);
        slurper.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false);
        slurper.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        slurper.setFeature("http://xml.org/sax/features/namespaces", false)

        GPathResult root = slurper.parse(inputStream)
        def nodeList = root.'Entrezgene'.list()

        nodeList.each { NodeChild node ->
            
            Gene gene = new Gene()
			parse(gene, node);
            geneList.add(gene);
        }

        //builder = null;
        root = null;

        return geneList;
    }

	@Override
	public Gene fetch(String id) throws IOException {
		URL url = getUrl(id, [tool:this.tool, email:this.email]);
		return read(url.openStream())
	}

	@Override
	public URL getUrl(String id, Map<String, String> paramMap) {
		Map<String, String> map = [db: EUtilsURLFactory.DB_GENE, id: id, retmode: 'xml']
		map.putAll(paramMap)
		
		String url = EUtilsURLFactory.getURL(EUtilsURLFactory.EFETCH, map);
		return new URL(url);
	}



	@Override
	public List<Gene> fetchAll(String id) throws IOException {
		URL url = getUrl(id, [tool:this.tool, email:this.email]);
		return readList(url.openStream())
	}



	@Override
	public IFetcher<Gene> getNewInstance() {
		return new EntrezGeneSlurper();
	}

}
